# Code to replicate 

# This is using the V-dem version 13 in CSV format. 
indiv <- read.csv("~/Dropbox/Mac/Downloads/V-Dem-coder-level_csv_v13/coder_level_ds_v13.csv")
dim(indiv)

table(indiv$historical_date)

indiv$year <- as.numeric(substr(indiv$historical_date, 1,4))

table(indiv$year)

cyearcount <- table(indiv$country_id, indiv$year)


testcyear <- indiv[indiv$country_id==3 & indiv$year == 1918,]



indiv$cyear <- paste(indiv$country_id, indiv$historical_date, sep="-")

# Variable we will focus on: Government censorship effort

indiv$v2mecenf
gcen <- subset(indiv, select=c(cyear, country_text_id, country_id, historical_date, coder_id,
                               v2mecenefm, v2mecenefm_beta, v2mecenefm_conf))


table(is.na(gcen$v2mecenefm))
table(is.na(gcen$v2mecenefm_conf))

codecount <- table(gcen$cyear, is.na(gcen$v2mecenefm))

mingcen <- tapply(gcen$v2mecenefm, gcen$cyear, min, na.rm=TRUE)
maxgcen <- tapply(gcen$v2mecenefm, gcen$cyear, max, na.rm=TRUE)
avgconf <- tapply(gcen$v2mecenefm_conf, gcen$cyear, mean, na.rm=TRUE)
sdcode <- tapply(gcen$v2mecenefm, gcen$cyear, sd, na.rm=TRUE)

gcen_cy <- data.frame(cyear=row.names(codecount), coders=codecount[,1])

# Checking order matches
table(names(mingcen)==gcen_cy$cyear)

gcen_cy$mingcen <- mingcen
gcen_cy$maxgcen <- maxgcen
gcen_cy$sdcode <- sdcode
gcen_cy$avgconf <- avgconf

# checking the Inf in the minimum coded corresponds to country-years with no coders
table(gcen_cy$coders, gcen_cy$mingcen==Inf)

# Similar for max and avg
table(gcen_cy$coders, gcen_cy$maxgcen==-Inf)
table(gcen_cy$coders, is.na(gcen_cy$avgconf))

# So let's just focus on the ones with coders
gcen_coded <- gcen_cy[gcen_cy$coders >0,]

# Breaking out country and date
temp <- matrix(unlist(strsplit(gcen_coded$cyear, "-")), ncol=4, byrow=TRUE)

# Usually day/month is 31/12 but sometimes not. is country-year really unique?

gcen_coded$country_id <- temp[,1]
gcen_coded$year <- temp[,2]
gcen_coded$month <- temp[,3]
gcen_coded$day <- temp[,4]

cyearcount <- table(paste(gcen_coded$country_id, gcen_coded$year))

# Restricting to 12-31 codings
gcen_coded <- gcen_coded[gcen_coded$month=="12" && gcen_coded$day=="31",]

gcen_coded$range <- gcen_coded$maxgcen-gcen_coded$mingcen

# Avg coders by year. Something going on from 2005-
ycr <- unique(gcen_coded$year)
plot(ycr, tapply(gcen_coded$coders, gcen_coded$year, mean), type="l")

# Average range by year
plot(ycr, tapply(gcen_coded$range, gcen_coded$year, mean), type="l")

# Average sd by year
plot(ycr, tapply(gcen_coded$sdcode, gcen_coded$year, mean, na.rm=TRUE), type="l")

# Average confidence by year
plot(ycr, tapply(gcen_coded$avgconf, gcen_coded$year, mean), type="l")

cor(gcen_coded$coders, gcen_coded$range)
cor(gcen_coded$coders, gcen_coded$sdcode, use="pairwise.complete.obs")




# Now a general function for this, 
# need to input but variable name and confidence name (could streamline)

make_cy <- function(var, varconf, evar=FALSE){
  minvar <- tapply(var, indiv$cyear, min, na.rm=TRUE)
  maxvar <- tapply(var, indiv$cyear, max, na.rm=TRUE)
  avgconf <- tapply(varconf, indiv$cyear, mean, na.rm=TRUE)
  sdcode <- tapply(var, indiv$cyear, sd, na.rm=TRUE)
  codecount <- table(indiv$cyear, is.na(var))
  
  var_cy <- data.frame(cyear=row.names(codecount), coders=codecount[,1])
  
  var_cy$minvar <- minvar
  var_cy$maxvar <- maxvar
  var_cy$sdcode <- sdcode
  var_cy$avgconf <- avgconf
  
  var_cy$minvar <- minvar
  var_cy$maxvar <- maxvar
  var_cy$sdcode <- sdcode
  var_cy$avgconf <- avgconf
  
 #  Let's just focus on the ones with coders
  var_coded <- var_cy[var_cy$coders >0,]
  
  # Breaking out country and date
  temp <- matrix(unlist(strsplit(var_coded$cyear, "-")), ncol=4, byrow=TRUE)
  
  # Usually day/month is 31/12 but sometimes not. is country-year really unique?
  
  var_coded$country_id <- temp[,1]
  var_coded$year <- temp[,2]
  var_coded$month <- temp[,3]
  var_coded$day <- temp[,4]
  

  # For non election-specific varriables,
  # just restricting to 12-31 codings (others might involve regime change?)
  if (evar==FALSE) var_coded <- var_coded[var_coded$month=="12" && var_coded$day=="31",]
  var_coded$range <- var_coded$maxvar-var_coded$minvar
  return(var_coded)
}



v2elmulpar <- make_cy(indiv$v2elmulpar, indiv$v2elmulpar_conf, evar=TRUE)
# GD election year vars



# gc2 <- make_cy(indiv$v2mecenefm, indiv$v2mecenefm_conf)
# 
# names(gc2)
# 
# ycr <- unique(gc2$year)
# plot(ycr, tapply(gc2$coders, gc2$year, mean), type="l")
# 
# 
# # Average range by year
# plot(ycr, tapply(gc2$range, gc2$year, mean), type="l")
# 
# # Average sd by year
# plot(ycr, tapply(gc2$sdcode, gc2$year, mean, na.rm=TRUE), type="l")
# 
# # Average confidence by year
# plot(ycr, tapply(gc2$avgconf, gc2$year, mean), type="l")

# parban <- make_cy(indiv$v2psparban, indiv$v2psparban_conf)
# ycr <- unique(parban$year)
# plot(ycr, tapply(parban$coders, parban$year, mean), type="l")
# plot(ycr, tapply(parban$range, parban$year, mean), type="l")
# plot(ycr, tapply(parban$avgconf, parban$year, mean), type="l")
# plot(ycr, tapply(parban$sdcode, parban$year, mean, na.rm=TRUE), type="l")
# 
# 
# 
# clac <- make_cy(indiv$v2clacfree, indiv$v2clacfree_conf)
# ycr <- unique(clac$year)
# plot(ycr, tapply(clac$coders, clac$year, mean), type="l")
# 
# plot(ycr, tapply(clac$sdcode, clac$year, mean, na.rm=TRUE), type="l")
# 
# emb <- make_cy(indiv$v2elembaut, indiv$v2elembaut_conf)

plotcoders <- function(vardata){
  ycr <- names(tapply(vardata$coders, vardata$year, mean))
  plot(ycr, tapply(vardata$coders, vardata$year, mean), type="l", xlab="Year", 
       ylab="Avg number of coders for variable")
}

plotconf <- function(vardata, xlim=NULL, ylim=NULL, norm=1){
  ycr <- names(tapply(vardata$avgconf, vardata$year, mean))
  plot(ycr, tapply(vardata$avgconf, vardata$year, mean)/norm, type="l", xlab="Year", 
       ylab="Avg confidence of coders", xlim=xlim, ylim=ylim)
  
}

plotsd <- function(vardata, xlim=NULL, ylim=NULL, norm=1, add=FALSE, col="black"){
  ycr <- names(tapply(vardata$sdcode, vardata$year, mean, na.rm=TRUE))
  if (add==FALSE) plot(ycr, tapply(vardata$sdcode, vardata$year, mean, na.rm=TRUE)/norm, 
                       type="l", xlab="Year", 
       ylab="Avg Standard Deviation of Coder Decisions", xlim=xlim, ylim=ylim, col=col)
  if (add==TRUE) lines(ycr, tapply(vardata$sdcode, vardata$year, mean, na.rm=TRUE)/norm, 
                       type="l", col=col)
}


pdf("~/Dropbox/Apps/Overleaf/media coverage of backsliding (1)/_img/mfcoders.pdf", 
    width=6.5, height=3, family="Times")
par(mfrow=c(1,3), mar=c(3,3,1,1), mgp=c(2, .7, 0))
plotcoders(v2mecenefm)
plotconf(v2mecenefm, ylim=c(0,1))
plotsd(v2mecenefm, ylim=c(0, 1.5))
dev.off()



